// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "aries/parquet/encryption/type_fwd.h"
#include <aries/parquet/platform.h>
#include <aries/parquet/properties.h>
#include <aries/parquet/schema.h>
#include <aries/parquet/types.h>

namespace parquet {
    class ColumnDescriptor;
    class EncodedStatistics;
    class FileCryptoMetaData;
    class Statistics;
    class SchemaDescriptor;

    namespace schema {
        class ColumnPath;
    } // namespace schema

    using KeyValueMetadata = ::nebula::KeyValueMetadata;

    class PARQUET_EXPORT ApplicationVersion {
    public:
        // Known Versions with Issues
        static const ApplicationVersion &PARQUET_251_FIXED_VERSION();

        static const ApplicationVersion &PARQUET_816_FIXED_VERSION();

        static const ApplicationVersion &PARQUET_CPP_FIXED_STATS_VERSION();

        static const ApplicationVersion &PARQUET_MR_FIXED_STATS_VERSION();

        static const ApplicationVersion &PARQUET_CPP_10353_FIXED_VERSION();

        // Application that wrote the file. e.g. "IMPALA"
        std::string application_;
        // Build name
        std::string build_;

        // Version of the application that wrote the file, expressed as
        // (<major>.<minor>.<patch>). Unmatched parts default to 0.
        // "1.2.3"    => {1, 2, 3}
        // "1.2"      => {1, 2, 0}
        // "1.2-cdh5" => {1, 2, 0}
        struct {
            int major;
            int minor;
            int patch;
            std::string unknown;
            std::string pre_release;
            std::string build_info;
        } version;

        ApplicationVersion() = default;

        explicit ApplicationVersion(const std::string &created_by);

        ApplicationVersion(std::string application, int major, int minor, int patch);

        // Returns true if version is strictly less than other_version
        bool VersionLt(const ApplicationVersion &other_version) const;

        // Returns true if version is strictly equal with other_version
        bool VersionEq(const ApplicationVersion &other_version) const;

        // Checks if the Version has the correct statistics for a given column
        bool HasCorrectStatistics(Type::type primitive, EncodedStatistics &statistics,
                                  SortOrder::type sort_order = SortOrder::SIGNED) const;
    };

    class PARQUET_EXPORT ColumnCryptoMetaData {
    public:
        static std::unique_ptr<ColumnCryptoMetaData> Make(const uint8_t *metadata);

        ~ColumnCryptoMetaData();

        bool Equals(const ColumnCryptoMetaData &other) const;

        std::shared_ptr<schema::ColumnPath> path_in_schema() const;

        bool encrypted_with_footer_key() const;

        const std::string &key_metadata() const;

    private:
        explicit ColumnCryptoMetaData(const uint8_t *metadata);

        class ColumnCryptoMetaDataImpl;
        std::unique_ptr<ColumnCryptoMetaDataImpl> impl_;
    };

    /// \brief Public struct for Thrift PageEncodingStats in ColumnChunkMetaData
    struct PageEncodingStats {
        PageType::type page_type;
        Encoding::type encoding;
        int32_t count;
    };

    /// \brief Public struct for location to page index in ColumnChunkMetaData.
    struct IndexLocation {
        /// File offset of the given index, in bytes
        int64_t offset;
        /// Length of the given index, in bytes
        int32_t length;
    };

    /// \brief ColumnChunkMetaData is a proxy around format::ColumnChunkMetaData.
    class PARQUET_EXPORT ColumnChunkMetaData {
    public:
        // API convenience to get a MetaData accessor

        TURBO_DEPRECATED("Use the ReaderProperties-taking overload")

        static std::unique_ptr<ColumnChunkMetaData> Make(
            const void *metadata, const ColumnDescriptor *descr,
            const ApplicationVersion *writer_version, int16_t row_group_ordinal = -1,
            int16_t column_ordinal = -1,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        static std::unique_ptr<ColumnChunkMetaData> Make(
            const void *metadata, const ColumnDescriptor *descr,
            const ReaderProperties &properties = default_reader_properties(),
            const ApplicationVersion *writer_version = nullptr, int16_t row_group_ordinal = -1,
            int16_t column_ordinal = -1,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        ~ColumnChunkMetaData();

        bool Equals(const ColumnChunkMetaData &other) const;

        // column chunk
        int64_t file_offset() const;

        // parameter is only used when a dataset is spread across multiple files
        const std::string &file_path() const;

        // column metadata
        bool is_metadata_set() const;

        Type::type type() const;

        int64_t num_values() const;

        std::shared_ptr<schema::ColumnPath> path_in_schema() const;

        bool is_stats_set() const;

        std::shared_ptr<Statistics> statistics() const;

        CompressionType compression() const;

        // Indicate if the ColumnChunk compression is supported by the current
        // compiled parquet library.
        bool can_decompress() const;

        const std::vector<Encoding::type> &encodings() const;

        const std::vector<PageEncodingStats> &encoding_stats() const;

        std::optional<int64_t> bloom_filter_offset() const;

        std::optional<int64_t> bloom_filter_length() const;

        bool has_dictionary_page() const;

        int64_t dictionary_page_offset() const;

        int64_t data_page_offset() const;

        bool has_index_page() const;

        int64_t index_page_offset() const;

        int64_t total_compressed_size() const;

        int64_t total_uncompressed_size() const;

        std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const;

        std::optional<IndexLocation> GetColumnIndexLocation() const;

        std::optional<IndexLocation> GetOffsetIndexLocation() const;

    private:
        explicit ColumnChunkMetaData(
            const void *metadata, const ColumnDescriptor *descr, int16_t row_group_ordinal,
            int16_t column_ordinal, const ReaderProperties &properties,
            const ApplicationVersion *writer_version = nullptr,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        // PIMPL Idiom
        class ColumnChunkMetaDataImpl;
        std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
    };

    /// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
    class PARQUET_EXPORT RowGroupMetaData {
    public:
        TURBO_DEPRECATED("Use the ReaderProperties-taking overload")

        static std::unique_ptr<RowGroupMetaData> Make(
            const void *metadata, const SchemaDescriptor *schema,
            const ApplicationVersion *writer_version,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        /// \brief Create a RowGroupMetaData from a serialized thrift message.
        static std::unique_ptr<RowGroupMetaData> Make(
            const void *metadata, const SchemaDescriptor *schema,
            const ReaderProperties &properties = default_reader_properties(),
            const ApplicationVersion *writer_version = nullptr,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        ~RowGroupMetaData();

        bool Equals(const RowGroupMetaData &other) const;

        /// \brief The number of columns in this row group. The order must match the
        /// parent's column ordering.
        int num_columns() const;

        /// \brief Return the ColumnChunkMetaData of the corresponding column ordinal.
        ///
        /// WARNING, the returned object references memory location in it's parent
        /// (RowGroupMetaData) object. Hence, the parent must outlive the returned
        /// object.
        ///
        /// \param[in] index of the ColumnChunkMetaData to retrieve.
        ///
        /// \throws ParquetException if the index is out of bound.
        std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int index) const;

        /// \brief Number of rows in this row group.
        int64_t num_rows() const;

        /// \brief Total byte size of all the uncompressed column data in this row group.
        int64_t total_byte_size() const;

        /// \brief Total byte size of all the compressed (and potentially encrypted)
        /// column data in this row group.
        ///
        /// This information is optional and may be 0 if omitted.
        int64_t total_compressed_size() const;

        /// \brief Byte offset from beginning of file to first page (data or
        /// dictionary) in this row group
        ///
        /// The file_offset field that this method exposes is optional. This method
        /// will return 0 if that field is not set to a meaningful value.
        int64_t file_offset() const;

        // Return const-pointer to make it clear that this object is not to be copied
        const SchemaDescriptor *schema() const;

        // Indicate if all of the RowGroup's ColumnChunks can be decompressed.
        bool can_decompress() const;

        // Sorting columns of the row group if any.
        std::vector<SortingColumn> sorting_columns() const;

    private:
        explicit RowGroupMetaData(
            const void *metadata, const SchemaDescriptor *schema,
            const ReaderProperties &properties,
            const ApplicationVersion *writer_version = nullptr,
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        // PIMPL Idiom
        class RowGroupMetaDataImpl;
        std::unique_ptr<RowGroupMetaDataImpl> impl_;
    };

    class FileMetaDataBuilder;

    /// \brief FileMetaData is a proxy around format::FileMetaData.
    class PARQUET_EXPORT FileMetaData {
    public:
        TURBO_DEPRECATED("Use the ReaderProperties-taking overload")

        static std::shared_ptr<FileMetaData> Make(
            const void *serialized_metadata, uint32_t *inout_metadata_len,
            std::shared_ptr<InternalFileDecryptor> file_decryptor);

        /// \brief Create a FileMetaData from a serialized thrift message.
        static std::shared_ptr<FileMetaData> Make(
            const void *serialized_metadata, uint32_t *inout_metadata_len,
            const ReaderProperties &properties = default_reader_properties(),
            std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        ~FileMetaData();

        bool Equals(const FileMetaData &other) const;

        /// \brief The number of parquet "leaf" columns.
        ///
        /// Parquet thrift definition requires that nested schema elements are
        /// flattened. This method returns the number of columns in the flattened
        /// version.
        /// For instance, if the schema looks like this :
        /// 0 foo.bar
        ///       foo.bar.baz           0
        ///       foo.bar.baz2          1
        ///   foo.qux                   2
        /// 1 foo2                      3
        /// 2 foo3                      4
        /// This method will return 5, because there are 5 "leaf" fields (so 5
        /// flattened fields)
        int num_columns() const;

        /// \brief The number of flattened schema elements.
        ///
        /// Parquet thrift definition requires that nested schema elements are
        /// flattened. This method returns the total number of elements in the
        /// flattened list.
        int num_schema_elements() const;

        /// \brief The total number of rows.
        ///
        /// If the FileMetaData was obtained by calling `SubSet()`, this is the total
        /// number of rows in the selected row groups.
        int64_t num_rows() const;

        /// \brief The number of row groups in the file.
        ///
        /// If the FileMetaData was obtained by calling `SubSet()`, this is the number
        /// of selected row groups.
        int num_row_groups() const;

        /// \brief Return the RowGroupMetaData of the corresponding row group ordinal.
        ///
        /// WARNING, the returned object references memory location in it's parent
        /// (FileMetaData) object. Hence, the parent must outlive the returned object.
        ///
        /// \param[in] index of the RowGroup to retrieve.
        ///
        /// \throws ParquetException if the index is out of bound.
        std::unique_ptr<RowGroupMetaData> RowGroup(int index) const;

        /// \brief Return the "version" of the file
        ///
        /// WARNING: The value returned by this method is unreliable as 1) the Parquet
        /// file metadata stores the version as a single integer and 2) some producers
        /// are known to always write a hardcoded value.  Therefore, you cannot use
        /// this value to know which features are used in the file.
        ParquetVersion::type version() const;

        /// \brief Return the application's user-agent string of the writer.
        const std::string &created_by() const;

        /// \brief Return the application's version of the writer.
        const ApplicationVersion &writer_version() const;

        /// \brief Size of the original thrift encoded metadata footer.
        uint32_t size() const;

        /// \brief Indicate if all of the FileMetaData's RowGroups can be decompressed.
        ///
        /// This will return false if any of the RowGroup's page is compressed with a
        /// compression format which is not compiled in the current parquet library.
        bool can_decompress() const;

        bool is_encryption_algorithm_set() const;

        EncryptionAlgorithm encryption_algorithm() const;

        const std::string &footer_signing_key_metadata() const;

        /// \brief Verify signature of FileMetaData when file is encrypted but footer
        /// is not encrypted (plaintext footer).
        bool VerifySignature(const void *signature);

        void WriteTo(::nebula::io::OutputStream *dst,
                     const std::shared_ptr<Encryptor> &encryptor = nullptr) const;

        /// \brief Return Thrift-serialized representation of the metadata as a
        /// string
        std::string SerializeToString() const;

        // Return const-pointer to make it clear that this object is not to be copied
        const SchemaDescriptor *schema() const;

        const std::shared_ptr<const KeyValueMetadata> &key_value_metadata() const;

        /// \brief Set a path to all ColumnChunk for all RowGroups.
        ///
        /// Commonly used by systems (Dask, Spark) who generates an metadata-only
        /// parquet file. The path is usually relative to said index file.
        ///
        /// \param[in] path to set.
        void set_file_path(const std::string &path);

        /// \brief Merge row groups from another metadata file into this one.
        ///
        /// The schema of the input FileMetaData must be equal to the
        /// schema of this object.
        ///
        /// This is used by systems who creates an aggregate metadata-only file by
        /// concatenating the row groups of multiple files. This newly created
        /// metadata file acts as an index of all available row groups.
        ///
        /// \param[in] other FileMetaData to merge the row groups from.
        ///
        /// \throws ParquetException if schemas are not equal.
        void AppendRowGroups(const FileMetaData &other);

        /// \brief Return a FileMetaData containing a subset of the row groups in this
        /// FileMetaData.
        std::shared_ptr<FileMetaData> Subset(const std::vector<int> &row_groups) const;

    private:
        friend FileMetaDataBuilder;
        friend class SerializedFile;
        friend class SerializedRowGroup;

        explicit FileMetaData(const void *serialized_metadata, uint32_t *metadata_len,
                              const ReaderProperties &properties,
                              std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr);

        void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor);

        const std::shared_ptr<InternalFileDecryptor> &file_decryptor() const;

        // PIMPL Idiom
        FileMetaData();

        class FileMetaDataImpl;
        std::unique_ptr<FileMetaDataImpl> impl_;
    };

    class PARQUET_EXPORT FileCryptoMetaData {
    public:
        // API convenience to get a MetaData accessor
        static std::shared_ptr<FileCryptoMetaData> Make(
            const uint8_t *serialized_metadata, uint32_t *metadata_len,
            const ReaderProperties &properties = default_reader_properties());

        ~FileCryptoMetaData();

        EncryptionAlgorithm encryption_algorithm() const;

        const std::string &key_metadata() const;

        void WriteTo(::nebula::io::OutputStream *dst) const;

    private:
        friend FileMetaDataBuilder;

        FileCryptoMetaData(const uint8_t *serialized_metadata, uint32_t *metadata_len,
                           const ReaderProperties &properties);

        // PIMPL Idiom
        FileCryptoMetaData();

        class FileCryptoMetaDataImpl;
        std::unique_ptr<FileCryptoMetaDataImpl> impl_;
    };

    // Builder API
    class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
    public:
        // API convenience to get a MetaData reader
        static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
            std::shared_ptr<WriterProperties> props, const ColumnDescriptor *column);

        static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
            std::shared_ptr<WriterProperties> props, const ColumnDescriptor *column,
            void *contents);

        ~ColumnChunkMetaDataBuilder();

        // column chunk
        // Used when a dataset is spread across multiple files
        void set_file_path(const std::string &path);

        // column metadata
        void SetStatistics(const EncodedStatistics &stats);

        // get the column descriptor
        const ColumnDescriptor *descr() const;

        int64_t total_compressed_size() const;

        // commit the metadata

        void Finish(int64_t num_values, int64_t dictionary_page_offset,
                    int64_t index_page_offset, int64_t data_page_offset,
                    int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
                    bool dictionary_fallback,
                    const std::map<Encoding::type, int32_t> &dict_encoding_stats_,
                    const std::map<Encoding::type, int32_t> &data_encoding_stats_,
                    const std::shared_ptr<Encryptor> &encryptor = nullptr);

        // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
        const void *contents() const;

        // For writing metadata at end of column chunk
        void WriteTo(::nebula::io::OutputStream *sink);

    private:
        explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
                                            const ColumnDescriptor *column);

        explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
                                            const ColumnDescriptor *column, void *contents);

        // PIMPL Idiom
        class ColumnChunkMetaDataBuilderImpl;
        std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
    };

    class PARQUET_EXPORT RowGroupMetaDataBuilder {
    public:
        // API convenience to get a MetaData reader
        static std::unique_ptr<RowGroupMetaDataBuilder> Make(
            std::shared_ptr<WriterProperties> props, const SchemaDescriptor *schema_,
            void *contents);

        ~RowGroupMetaDataBuilder();

        ColumnChunkMetaDataBuilder *NextColumnChunk();

        int num_columns();

        int64_t num_rows();

        int current_column() const;

        void set_num_rows(int64_t num_rows);

        // commit the metadata
        void Finish(int64_t total_bytes_written, int16_t row_group_ordinal = -1);

    private:
        explicit RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props,
                                         const SchemaDescriptor *schema_, void *contents);

        // PIMPL Idiom
        class RowGroupMetaDataBuilderImpl;
        std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
    };

    /// \brief Public struct for location to all page indexes in a parquet file.
    struct PageIndexLocation {
        /// Alias type of page index location of a row group. The index location
        /// is located by column ordinal. If the column does not have the page index,
        /// its value is set to std::nullopt.
        using RowGroupIndexLocation = std::vector<std::optional<IndexLocation> >;
        /// Alias type of page index location of a parquet file. The index location
        /// is located by the row group ordinal.
        using FileIndexLocation = std::map<size_t, RowGroupIndexLocation>;
        /// Row group column index locations which uses row group ordinal as the key.
        FileIndexLocation column_index_location;
        /// Row group offset index locations which uses row group ordinal as the key.
        FileIndexLocation offset_index_location;
    };

    class PARQUET_EXPORT FileMetaDataBuilder {
    public:
        TURBO_DEPRECATED("Deprecated in 12.0.0. Use overload without KeyValueMetadata instead.")

        static std::unique_ptr<FileMetaDataBuilder> Make(
            const SchemaDescriptor *schema, std::shared_ptr<WriterProperties> props,
            std::shared_ptr<const KeyValueMetadata> key_value_metadata);

        // API convenience to get a MetaData builder
        static std::unique_ptr<FileMetaDataBuilder> Make(
            const SchemaDescriptor *schema, std::shared_ptr<WriterProperties> props);

        ~FileMetaDataBuilder();

        // The prior RowGroupMetaDataBuilder (if any) is destroyed
        RowGroupMetaDataBuilder *AppendRowGroup();

        // Update location to all page indexes in the parquet file
        void SetPageIndexLocation(const PageIndexLocation &location);

        // Complete the Thrift structure
        std::unique_ptr<FileMetaData> Finish(
            const std::shared_ptr<const KeyValueMetadata> &key_value_metadata = nullptr);

        // crypto metadata
        std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData();

    private:
        explicit FileMetaDataBuilder(
            const SchemaDescriptor *schema, std::shared_ptr<WriterProperties> props,
            std::shared_ptr<const KeyValueMetadata> key_value_metadata = nullptr);

        // PIMPL Idiom
        class FileMetaDataBuilderImpl;
        std::unique_ptr<FileMetaDataBuilderImpl> impl_;
    };

    PARQUET_EXPORT std::string ParquetVersionToString(ParquetVersion::type ver);
} // namespace parquet
